package com.gt.web.srv.utils;

/**
 * <p>
 * SpiderUtil
 * </p>
 *
 * @author: WeiHuan
 * @version:1.0
 * @date: 2024/8/5 16:41
 * 注意：本内容仅限于公司内部传阅，禁止外泄以及用于其他的商业目的
 */


import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

/**
 * 文章爬取工具类
 *
 * @author ZhangYuanqiang
 * @since 2020/01/04
 */
public class SpiderUtil {

	// 微信公众号文章域名
	private static final String WX_DOMAIN = "https://mp.weixin.qq.com";
	// 文章返回前端统一key常量
	private static final String KEY_TITLE = "title"; // 文章标题
	private static final String KEY_COVER_URL = "coverLink"; // 文章封面图链接
	private static final String KEY_REFER_NAME = "referName"; // 文章出处作者
	private static final String KEY_REFER_URL = "referLink"; // 文章出处链接
	private static final String KEY_TAGS = "tags"; // 文章内容
	private static final String KEY_NAME = "name"; // 标签名称
	private static final String KEY_TEXT = "text"; // 文本信息
	private static final String KEY_HREF = "href"; // a标签链接

	/**
	 * 测试主方法
	 */
	public static void main(String args[]) throws IOException {
		// String url = "https://mp.weixin.qq.com/s/CFXzXxJ74HAWWO33CZYDMw";
		// Resp<JSONObject> resp = getActicle(url);
		// if (resp.isSuccess()) {
		// 	System.out.println(resp.getBody());
		// } else {
		// 	System.out.println(resp.getMsg());
		// }
		Document doc = Jsoup.connect("https://mp.weixin.qq.com/s/CFXzXxJ74HAWWO33CZYDMw").get();
		String jsContent = doc.getElementById("js_content").html();
		String title = doc.getElementById("activity-name").html();
		//内容
		String outContent = jsContent.replace("data-src", "src");
		Elements elements = doc.getElementsByTag("meta");
		Element first = elements.select("meta[property=og:image]").first();
		String imgurl = first.attr("content");
		// System.out.println("开始采集");
		// System.out.println(title);
		// System.out.println(jsContent);
		String mydata = doc.html();
		// mydata.
		System.out.println("mydata = " + mydata);
		System.out.println("mydata = " + imgurl);


	}

	/**
	 * 检测文章链接是否合法
	 */
	public static String checkUrl(String url) {
		if (url == null) {
			return "请输入文章链接";
		}
		if (!url.startsWith(WX_DOMAIN)) {
			return "请输入微信公众号文章链接";
		}
		return null;
	}


	/**
	 * 微信公众号请求头设置
	 */
	public static Map<String, String> getWxHeaderMap() {
		Map<String, String> map = new HashMap<>(new LinkedHashMap<>());
		map.put("Accept", "text/html, application/xhtml+xml, image/jxr, */*");
		map.put("Accept-Encoding", "gzip, deflate");
		map.put("Accept-Language", "zh-Hans-CN, zh-Hans; q=0.8, en-US; q=0.5, en; q=0.3");
		map.put("Host", "mp.weixin.qq.com");
		map.put("If-Modified-Since", "Sat, 04 Jan 2020 12:23:43 GMT");
		map.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko");
		return map;
	}



}